home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Skunkware 5
/
Skunkware 5.iso
/
src
/
Tools
/
man2html-2.0.2
/
man2html.pl
< prev
next >
Wrap
Perl Script
|
1995-06-18
|
14KB
|
448 lines
#! /usr/local/contrib/bin/perl
##---------------------------------------------------------------------------##
## File:
## man2html
## Author:
## Earl Hood ehood@convex.com
## Description:
## man2html is a Perl program to convert formatted nroff output
## to HTML.
##
## Recommend command-line options based on platform:
##
## Platform Options
## ---------------------------------------------------------------------
## c2mp <None, the defaults should be okay>
## hp9000s700/800 -leftm 1 -topm 8
## sun4 -sun
## ---------------------------------------------------------------------
##
##---------------------------------------------------------------------------##
## Copyright (C) 1994 Earl Hood, ehood@convex.com
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
##---------------------------------------------------------------------------##
require 'newgetopt.pl' || die "Unable to require newgetopt.pl\n";
($PROG = $0) =~ s/.*\///;
$VERSION = "2.0.2";
## Backspace character: Used in overstriking detection
$bs = "\b";
## Associative array of section titles and their HTML tag wrapper.
## This list allows customization of what HTML tag is used for
## a given section head.
##
## The section title can be a regular expression. Therefore, one must
## be careful about quoting special characters.
##
%SectionHead = (
'\S.*OPTIONS.*', '<H2>',
'AUTHORS?', '<H2>',
'BUGS', '<H2>',
'COMPATIBILITY', '<H2>',
'DEPENDENCIES', '<H2>',
'DESCRIPTION', '<H2>',
'DIAGNOSTICS', '<H2>',
'ENVIRONMENT', '<H2>',
'ERRORS', '<H2>',
'EXAMPLES', '<H2>',
'EXTERNAL INFLUENCES', '<H2>',
'FILES', '<H2>',
'LIMITATIONS', '<H2>',
'NAME', '<H2>',
'NOTES?', '<H2>',
'OPTIONS', '<H2>',
'REFERENCES', '<H2>',
'RETURN VALUE', '<H2>',
'SECTION.*:', '<H2>',
'SEE ALSO', '<H2>',
'STANDARDS CONFORMANCE', '<H2>',
'STYLE CONVENTION', '<H2>',
'SYNOPSIS', '<H2>',
'SYNTAX', '<H2>',
'WARNINGS', '<H2>',
'\s+Section.*:', '<H3>',
);
$HeadFallback = '<H2>'; # Fallback tag if above is not found.
##---------------------------------------------------------------------------
##-----------##
## MAIN Body ##
##-----------##
{
&get_cli_opts();
## Check if processing a keyord search
if ($K && $CGIURL) { &man_k(); exit 0; }
local($line,$tmp,$i,$head,$preindent,$see_also);
$see_also = 1;
if (!$BARE) {
print STDOUT "<HTML>\n";
print STDOUT "<HEAD>\n",
"<TITLE>$TITLE</TITLE>\n",
"</HEAD>\n" if $TITLE;
print STDOUT "<BODY>\n";
print STDOUT "<H1>$TITLE</H1>\n",
"<HR>\n" if $TITLE;
}
print STDOUT "<PRE>\n";
while(!eof(STDIN)) {
for ($i=0; $i < $hdsz; $i++) { <STDIN>; }
for ($i=0; $i < $txsz; $i++) {
$_ = <STDIN>;
## Try to check if line space is needed at page boundaries ##
if (!$NODEPAGE && ($i==0 || $i==($txsz-1)) && !/^\s*$/) {
/^(\s*)/; $tmp = length($1);
if ($do) {
if ($tmp < $preindent) { print STDOUT "\n"; }
} else {
$do = 1;
}
$preindent = $tmp;
} else {
$do = 0; $preindent = 0;
}
## Interpret line
$line = $_;
&entitize(*_); # Convert [$<>] to entity references
## Create anchor links for manpage references
s/((((.$bs)+)?[\+_\.\w-])+\(((.$bs)+)?\d((.$bs)+)?\w?\))/&make_xref($1)/oge
if $CGIURL && $see_also;
## Emphasize underlined words
s/((_\010[^_])+[\.\(\)_]?(_\010[^_])+\)?)/&emphasize($1)/oge;
$secth = 0;
## Check for strong text and headings
if ($SUN || /.\010./o) {
if (!$NOHEADS) {
$line =~ s/.\010//go;
$tmp = $HeadFallback;
foreach $head (keys %SectionHead) {
if ($line =~ /^$leftm$head/) {
$tmp = $SectionHead{$head};
$secth = 1;
last;
}
}
if ($secth || $line =~ /^$leftm\S/o) {
if ($CGIURL && $SEEALSO) {
if ($line =~ /SEE ALSO/o) { $see_also = 1; }
else { $see_also = 0; }
}
chop $line;
$_ = $tmp . $line . $tmp;
s%<([^>]*)>$%</$1>%;
$_ = "\n</PRE>\n" . $_ . "<PRE>\n";
} else {
s/(((.\010)+.)+)/&strongize($1)/oge;
}
} else {
s/(((.\010)+.)+)/&strongize($1)/oge;
}
}
print STDOUT;
}
for ($i=0; $i < $ftsz; $i++) { <STDIN>; }
}
print STDOUT "</PRE>\n",
"</BODY>\n",
"</HTML>\n" unless $BARE;
exit 0;
} ## End Main
##---------------------------------------------------------------------------
sub get_cli_opts {
&usage unless
&NGetOpt(
"botm=i", # Number of lines for bottom margin (def: 7)
"headmap=s", # Filename of user section head map file
"leftm=i", # Character width of left margin (def: 0)
"nodepage", # Do not remove pagination lines
"noheads", # Do not detect for section heads
"pgsize=i", # Number of lines in a page (def: 66)
"title=s", # Title of manpage (def: Not defined)
"topm=i", # Number of lines for top margin (def: 7)
"sun", # Section heads are not overstriked in input
"cgiurl=s", # CGI URL for linking to other manpages
"seealso", # Link to other manpages only in the SEE ALSO section
"k", # Process input from 'man -k' output.
"bare", # Leave out HTML, HEAD, BODY tags.
"help" # Short usage message
);
&usage() if defined($opt_help);
$pgsz = ($opt_pgsize ? $opt_pgsize : 66);
if (defined($opt_nodepage)) {
$hdsz = 0;
$ftsz = 0;
} else {
$hdsz = (defined($opt_topm) ? $opt_topm : 7);
$ftsz = (defined($opt_botm) ? $opt_botm : 7);
}
$txsz = $pgsz - ($hdsz + $ftsz);
$leftmsz = (defined($opt_leftm) ? $opt_leftm : 0);
$leftm = ' ' x $leftmsz;
$TITLE = ($opt_title ? $opt_title : "");
$NOHEADS = (defined($opt_noheads) ? 1 : 0);
$SUN = (defined($opt_sun) ? 1 : 0);
$CGIURL = ($opt_cgiurl ? $opt_cgiurl : "");
$SEEALSO = ($opt_seealso ? 1 : 0);
$K = ($opt_k ? 1 : 0);
$BARE = ($opt_bare ? 1 : 0);
if (defined($opt_headmap)) {
require $opt_headmap || warn "Unable to read $opt_headmap\n";
}
}
##---------------------------------------------------------------------------
sub emphasize {
local($txt) = shift;
$txt =~ s/.\010//go;
$txt = "<EM>$txt</EM>";
$txt;
}
##---------------------------------------------------------------------------
sub strongize {
local($txt) = shift;
$txt =~ s/.\010//go;
$txt = "<STRONG>$txt</STRONG>";
$txt;
}
##---------------------------------------------------------------------------
sub entitize {
local(*txt) = shift;
## Check for special characters in overstrike text ##
$txt =~ s/_\010\&/&strike('_', '&')/geo;
$txt =~ s/_\010</&strike('_', '<')/geo;
$txt =~ s/_\010>/&strike('_', '>')/geo;
$txt =~ s/(\&\010)+\&/&strike('&', '&')/geo;
$txt =~ s/(<\010)+</&strike('<', '<')/geo;
$txt =~ s/(>\010)+>/&strike('>', '>')/geo;
## Check for special characters in regular text. Must be careful
## to check before/after character in expression because it might be
## a special character.
$txt =~ s/([^\010]\&[^\010])/&htmlize2($1)/geo;
$txt =~ s/([^\010]<[^\010])/&htmlize2($1)/geo;
$txt =~ s/([^\010]>[^\010])/&htmlize2($1)/geo;
}
##---------------------------------------------------------------------------
## htmlize2() is used by entitize.
##
sub htmlize2 {
local($str) = shift;
$str =~ s/&/\&/g;
$str =~ s/</\</g;
$str =~ s/>/\>/g;
$str;
}
##---------------------------------------------------------------------------
## strike converts HTML special characters in overstriked text
## into entity references. The entities are overstriked so
## strongize() and emphasize() will recognize the entity to be
## wrapped in <STRONG>/<EM> tags.
##
sub strike {
local($w, $char) = @_;
local($ret);
if ($w eq '_') {
if ($char eq '&') {
$ret = "_$bs\&_${bs}a_${bs}m_${bs}p_${bs};";
} elsif ($char eq '<') {
$ret = "_$bs\&_${bs}l_${bs}t_${bs};";
} elsif ($char eq '>') {
$ret = "_$bs\&_${bs}g_${bs}t_${bs};";
} else {
warn qq|Unrecognized character, "$char", passed to strike()\n|;
}
} else {
if ($char eq '&') {
$ret = "\&$bs\&a${bs}am${bs}mp${bs}p;${bs};";
} elsif ($char eq '<') {
$ret = "\&$bs\&l${bs}lt${bs}t;${bs};";
} elsif ($char eq '>') {
$ret = "\&$bs\&g${bs}gt${bs}t;${bs};";
} else {
warn qq|Unrecognized character, "$char", passed to strike()\n|;
}
}
$ret;
}
##---------------------------------------------------------------------------
## make_xref() was originally added to man2html by Maurice Cinquini
## <mauricec@tplrd.tpl.oz.au> for use in the SEE ALSO section. The
## code has been modified to handle more general cases, and the routine
## is called for all manpage cross-references throughout.
##
## Specifically, I modified it to support the user's URL template for
## linking to other manpages, support for [+_,-] in the title name,
## and to handle <EM> tagging.
##
sub make_xref {
local($str) = shift;
$str =~ s/.\010//go; # Remove overstriking
local($title,$section,$subsection) =
($str =~ /([\+_\.\w-]+)\((\d)(\w?)\)/);
local($href) = (eval "\"$CGIURL\"");
qq|<STRONG><A HREF="$href">$str</A></STRONG>|;
}
##---------------------------------------------------------------------------
## man_k() process a keyword search.
##
sub man_k {
local($line,$refs,$section,$subsection,$desc,$i,
%Sec1, %Sec1sub, %Sec2, %Sec2sub, %Sec3, %Sec3sub,
%Sec4, %Sec4sub, %Sec5, %Sec5sub, %Sec6, %Sec6sub,
%Sec7, %Sec7sub, %Sec8, %Sec8sub, %Sec9, %Sec9sub,
%SecN, %SecNsub, %SecNsec);
print STDOUT "<HTML>\n";
print STDOUT "<HEAD>\n",
"<TITLE>$TITLE</TITLE>\n",
"</HEAD>\n" if $TITLE;
print STDOUT "<BODY>\n";
print STDOUT "<H1>$TITLE</H1>\n",
"<HR>\n" if $TITLE;
while ($line = <STDIN>) {
next if $line !~ /\(\d\w?\)\s*-/;
($refs,$section,$subsection,$desc) =
$line =~ /^\s*(.*)\((\d)(\w?)\)\s*-\s*(.*)$/;
$refs =~ s/\s(and|or)\s/,/gi; # Convert and/or to commas
$refs =~ s/\s//g; # Remove all whitespace
$refs =~ s/,/, /g; # Put space after comma
&htmlize(*desc); # Check for special chars in desc
$desc =~ s/^(.)/\U$1/; # Uppercase first letter in desc
if ($section eq '1') {
$Sec1{$refs} = $desc; $Sec1sub{$refs} = $subsection;
} elsif ($section eq '2') {
$Sec2{$refs} = $desc; $Sec2sub{$refs} = $subsection;
} elsif ($section eq '3') {
$Sec3{$refs} = $desc; $Sec3sub{$refs} = $subsection;
} elsif ($section eq '4') {
$Sec4{$refs} = $desc; $Sec4sub{$refs} = $subsection;
} elsif ($section eq '5') {
$Sec5{$refs} = $desc; $Sec5sub{$refs} = $subsection;
} elsif ($section eq '6') {
$Sec6{$refs} = $desc; $Sec6sub{$refs} = $subsection;
} elsif ($section eq '7') {
$Sec7{$refs} = $desc; $Sec7sub{$refs} = $subsection;
} elsif ($section eq '8') {
$Sec8{$refs} = $desc; $Sec8sub{$refs} = $subsection;
} elsif ($section eq '9') {
$Sec9{$refs} = $desc; $Sec9sub{$refs} = $subsection;
} else { # Catch all
$SecN{$refs} = $desc; $SecNsec{$refs} = $section;
$SecNsub{$refs} = $subsection;
}
}
&print_mank_sec(*Sec1, 1, *Sec1sub);
&print_mank_sec(*Sec2, 2, *Sec2sub);
&print_mank_sec(*Sec3, 3, *Sec3sub);
&print_mank_sec(*Sec4, 4, *Sec4sub);
&print_mank_sec(*Sec5, 5, *Sec5sub);
&print_mank_sec(*Sec6, 6, *Sec6sub);
&print_mank_sec(*Sec7, 7, *Sec7sub);
&print_mank_sec(*Sec8, 8, *Sec8sub);
&print_mank_sec(*Sec9, 9, *Sec9sub);
&print_mank_sec(*SecN, 'N', *SecNsub, *SecNsec);
print STDOUT "</DL>\n",
"</BODY>\n",
"</HTML>\n";
}
##---------------------------------------------------------------------------
## print_mank_sec() prints out manpage cross-refs of a specific section.
sub print_mank_sec {
local(*sec, $sect, *secsub, *secsec) = @_;
local(@array, @refs, $href, $item, $title, $subsection, $i, $section);
$section = $sect;
@array = sort keys %sec;
if ($#array >= 0) {
print STDOUT "<H2>Section $section</H2>\n",
"<DL>\n";
foreach $item (@array) {
$section = $secsec{$item} if $sect eq 'N';
@refs = split(/,/,$item);
$title = $refs[0];
$title =~ s/\(\)//g; # Watch out for extra ()'s
$subsection = $secsub{$item};
$href = eval "\"$CGIURL\""; # Create HREF string
print STDOUT "<DT>\n";
$i = 0;
foreach (@refs) {
print STDOUT qq|<A HREF="$href">$_</A>|;
print STDOUT ", " if $i < $#refs;
$i++;
}
print STDOUT " ($section$subsection)\n",
"<DD>\n",
$sec{$item}, "\n";
}
print STDOUT "</DL>\n";
}
}
##---------------------------------------------------------------------------
sub htmlize {
local(*str) = shift;
$str =~ s/&/\&/g;
$str =~ s/</\</g;
$str =~ s/>/\>/g;
$str;
}
##---------------------------------------------------------------------------
sub usage {
print STDOUT <<EndOfUsage;
Usage: $PROG [ options ] < infile > outfile
Options:
-bare : Do not put in HTML, HEAD, BODY tags
-botm <#> : Number of lines for bottom margin (def: 7)
-cgiurl <url> : URL for linking to other manpages
-headmap <file> : Filename of user section head map file
-help : This message
-k : Process a keyword search result
-leftm <#> : Character width of left margin (def: 0)
-nodepage : Do not remove pagination lines
-noheads : Do not detect for section heads
-pgsize <#> : Number of lines in a page (def: 66)
-seealso : Link to other manpages only in the SEE ALSO section
-sun : Section heads are not overstriked in input
-title <string> : Title of manpage (def: Not defined)
-topm <#> : Number of line for top margin (def: 7)
Description:
$PROG takes formatted manpages from STDIN and converts it to HTML sent to
STDOUT. The -topm and -botm arguments are the number of lines to the main
body text and NOT to the running headers/footers.
Version:
$VERSION
EndOfUsage
exit 0;
}